home *** CD-ROM | disk | FTP | other *** search
-
- /********************************************************************/
- /* CODE CONTAINED IN FILE NAMED: WSSTRIP.C */
- /* */
- /* DATE FINISHED 1/26/87 BY: Tadas Osmolskis */
- /* */
- /* PURPOSE: Cleans up Wordstar files. Strips high bits, gets rid */
- /* of formatting special characters, and deletes lines */
- /* that begin with periods (dot commands). Provides simple */
- /* minded line, word and sentence counts. */
- /* */
- /* COMPILER USED: Datalight C, version 2.12 */
- /* */
- /* OTHER FILES: HGETSTR.C - function which gets a string from */
- /* the console, without a carriage */
- /* return at the end. From Hunt, */
- /* The C Toolbox (Addison-Wesley,1985) */
- /* */
- /* TESTING CONDUCTED: Tested on heavily formatted WS file, on files */
- /* with embedded dot commands, (including those */
- /* with dot commands at beginning of file), and */
- /* on file containing only an EOF. All input */
- /* modes tested. */
- /* */
- /* KNOWN LIMITATIONS: Not the fastest program known to humankind. */
- /* */
- /* OTHER COMMENTS: Should be fairly portable to any compiler that */
- /* supports floats & bit manipulations. */
- /* */
- /********************************************************************/
- #include <stdio.h>
- #include <ctype.h>
-
- #define BOOL int
- #define FALSE 0
- #define TRUE 1
-
- #define SENTEND (curchar=='.' || curchar=='!' || curchar=='?')
- #define WHITESP (curchar==' ' || curchar=='\t' || curchar=='\n')
- #define WORDEND (curchar==' ' || curchar==',' || curchar ==';' ||\
- curchar==':' || curchar==')' )
-
-
- #define CR 0x0D
- #define LF 0x0A
- #define MASK 0x7F /* Produces bit pattern which, when logically ANDed
- with a character-holding int, strips the high bit */
-
- #define DEBUG 0
-
- /* MS-DOS C compilers don't return the program name as argv[0] */
- /* Therefore, we have to provide the name if we want to use it */
-
- char pgmname[]="wsstrip"; /* Used by get_file_names() */
-
-
- main (argc, argv)
- int argc;
- char *argv[];
-
- { /** STARTBODY main **/
-
- int curchar;
- int get_file_names();
- BOOL inword = FALSE;
- BOOL is_space();
- FILE *infile;
- FILE *outfile;
- FILE *file_array[2];
- long count_words();
- long numchars = 0L;
- long numlines = 0L;
- long numsent = 0L;
- long numwords = 0L;
- char temp;
-
-
- /** If the user didn't specify any file names on the command line, **/
- /** the program will prompt for an input and output file name. **/
- /** If only one file name is given on the command line, output will **/
- /** be routed to stdout, so that it can be redirected. If both file **/
- /** names are on the command line, the first name is the input file **/
- /** name, and the second is the output file name. **/
-
- if (argc < 2)
- {
- get_file_names(file_array,pgmname);
- infile = file_array[0];
- outfile = file_array[1];
- }
- else
- if (argc == 2)
- {
- infile = fopen(argv[1],"r");
- if (! infile )
- {
- fprintf(stderr,"\nThe file named %s doesn't seem to be ",argv[1]);
- fprintf(stderr,"on this disk or directory.\nYou must be ");
- fprintf(stderr,"on the same drive/directory as the file ");
- fprintf(stderr,"that you want to process.\nCheck the directory ");
- fprintf(stderr,"and make sure that you are where you want to ");
- fprintf(stderr,"be,\nand that you spelled the file name correctly.");
- exit();
- }
- outfile = stdout; /* to the tube, unless you tell it otherwise */
- }
- else
- {
- infile = fopen(argv[1],"r");
- if (! infile )
- {
- fprintf(stderr,"\nThe file named %s doesn't seem to be ",argv[1]);
- fprintf(stderr,"on this disk or directory.\nYou must be ");
- fprintf(stderr,"on the same drive and directory as the file ");
- fprintf(stderr,"that you want to process.\nCheck the directory ");
- fprintf(stderr,"and make sure that you are where you want to ");
- fprintf(stderr,"be,\nand that you spelled the file name correctly.");
- exit();
- }
- if (outfile = fopen(argv[2],"r"))
- {
- fprintf(stderr,"\nThe file you want to write to, %s, ",argv[2]);
- fprintf(stderr,"already exists. Pick a \ndifferent name and ");
- fprintf(stderr,"run %s again",pgmname);
- fclose(outfile);
- exit();
- }
- else
- outfile = fopen(argv[2],"w");
- }
-
- /* Test the first character to see if it's a Wordstar "dot command" */
- /* The general idea is that temp holds the first character read */
- /* until the first line is read (a "state variable") if the first */
- /* character is a period (which means it's a dot command). Note */
- /* that, after the first line is read and thrown away, the program */
- /* tests the first char of the next line as well. (Wordstar dot */
- /* commands tend to cluster at the top of files). */
- /* If it isn't a dot command, then we put back the character & */
- /* pretend nothing happened. */
-
- temp = getc(infile);
- while (temp == '.') /* If it's a dot command ... */
- {
- curchar = temp; /* initialize curchar to something we know */
- /* isn't a "\n" */
-
- while (curchar != '\n') /* while we're still in the dot command */
- /* line */
- {
- curchar = getc(infile); /*read it and throw it out */
- }
- temp =getc(infile); /* Maybe the first char in the next line */
- /* is a dot command as well .... */
- }
-
- ungetc(temp, infile); /* We can start processing the file now */
-
- curchar = getc(infile); /*Priming read. Datalight C V2.12 didn't */
- /* like the read in the following "while" */
-
- /** Read the file, strip the high bits, and count characters, sentences, **/
- /** lines, and words. **/
-
- while (curchar != EOF)
- {
- curchar &= MASK; /* Strip the high bit. No more */
- /* wordstar funny characters */
- /* Check for "soft carriage returns" */
-
-
- if (curchar == CR)
- if ((temp = getc(infile)) == LF) /* try it */
- curchar = '\n';
- else
- ungetc (temp, infile); /* put it back if not */
-
- /* Now, make sure it's a print character or a legitimate control character */
- /* and not a happy face or some other weirdness caused by Wordstar format */
- /* codes. */
-
- if ( ( (curchar >= ' ') && (curchar <= '~') )
- || (curchar == '\r')
- || (curchar == '\t')
- || (curchar == '\n')
- || (curchar == '\f'))
- putc(curchar,outfile);
-
-
- if (!WHITESP) numchars++; /* Count characters... */
-
- if (SENTEND) numsent++; /* sentences... */
-
- if (curchar == '\n')
- {
- numlines++; /* lines... */
-
- /* and while we're here, let's kill off lines with Wordstar */
- /* dot commands... */
-
- temp = getc(infile);
- if (temp == '.')
- {
- while (curchar = getc(infile) != '\n')
- ; /*throw it out */
- }
- else
- {
- ungetc(temp,infile); /* It's not a dot in col 1, so put it */
- } /* back ... */
- } /* end of the "\n" processing; back to counting */
-
- /* ...words, which are not as simple-minded as the above three. */
-
- if ( (is_space (curchar)) || (WORDEND) ) /* We're clearly not */
- inword = FALSE; /* within a word ... */
- else
- if (!inword) /* If we're not already in a word; ie.,
- if this is the first character */
- {
- inword = TRUE; /* let's make it TRUE */
- numwords++; /* and increment the counter */
- }
-
- curchar = getc(infile); /* Read the next character */
-
- } /* End of "while not EOF" loop*/
-
-
-
- /** Print the count of characters, sentences, lines and words, and **/
- /** compute and print the average number of characters per word. **/
-
-
- printf ("\n\t\t\tNumber of characters: %ld\n",
- numchars);
-
- printf ("\t\t\tNumber of lines: %ld\n",
- numlines);
-
- printf ("\t\t\tNumber of words: %ld\n",
- numwords);
-
- printf ("\t\t\tNumber of sentences: %ld\n",
- numsent);
-
- if (numwords > 0L) /* Dividing by zero isn't such a wonderful idea... */
- /* Also, note the casts to type float in the printf */
- /* statement */
- {
- printf ("\t\t\tAverage characters per word: %4.1f\n",
- (float)numchars/(float)numwords);
- }
- else
- {
- fprintf(stderr,"Your file, %s, doesn't have\n any words in it. ",argv[1]);
- fprintf(stderr,"Make sure you're working with the right file");
- }
-
-
- /** Now, the cleanup, which for this program, is to close the files. **/
-
- fclose(infile);
- fclose(outfile);
-
- } /* ENDMAIN */
-
- BOOL is_space(ch)
- char ch;
- {
- return ((ch == ' '|| ch == '\t' || ch == '\n') ? TRUE : FALSE);
- }
-
-
- int get_file_names(file_array,program_name)
- FILE * file_array[2];
- char * program_name; /* MS-DOS doesn't return the program name as argv[0] */
- /* 8 characters is the maximum length for a filename */
- /* under MS-DOS. */
- {
- char in_file_name[13], out_file_name[13];
- int getstr(); /* Returns the count of characters in the string */
- int x; /* Throwaway to receive value returned by getstr() */
-
- printf ("You haven't given %s an input and output file name\n", program_name);
- printf ("What is the name of the file you want to process?: ");
- x = getstr(in_file_name,12);
- if (! (file_array[0] = fopen(in_file_name,"r")))
- {
- fprintf(stderr,"\nThe file named %s doesn't seem to be ",in_file_name);
- fprintf(stderr,"on this disk or directory.\nYou must be ");
- fprintf(stderr,"on the same drive / directory as the file ");
- fprintf(stderr,"that you're looking for.\nCheck the directory ");
- fprintf(stderr,"and make sure that you are where you want to ");
- fprintf(stderr,"be,\nand that you spelled the file name correctly.");
- exit();
- }
-
-
- printf ("What name do you want the output file to have?: ");
- x = getstr(out_file_name,12);
- if (file_array[1] = fopen(out_file_name,"r"))
- {
- fprintf(stderr,"\nThe file you want to write to, %s, ",out_file_name);
- fprintf(stderr,"already exists. Pick a different name and ");
- fprintf(stderr,"run %s again",program_name);
- fclose(file_array[1]);
- exit();
- }
- else
- file_array[1] = fopen(out_file_name,"w");
-
- } /*** END OF get_file_names() ***/